Chapter 4 Community composition

load("data/data.Rdata")
quality <- read_tsv("results/quality.tsv")
Rows: 190 Columns: 2
── Column specification ──────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): microsample
dbl (1): quality

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

4.1 Taxonomy barplot

4.1.1 Positive samples, coverage-filtered

#Get phylum colors from the EHI standard
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    select(phylum, colors) %>% 
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()
Rows: 202 Columns: 2
── Column specification ──────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
  mutate(section=unlist(section)) %>%
  filter(!is.na(count)) %>%
  filter(count > 0) %>%
  filter(section != "Ileum") %>%
  filter(type == "Positive") %>%
  filter(quality == 5) %>%
  ggplot(., aes(x=count,y=microsample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors[-4]) +
    labs(x = "Relative abundance", y="Microsamples") +
    facet_nested(cryosection  ~ .,  scales="free_y") + #facet per day and treatment
 guides(fill = guide_legend(ncol = 1)) +
    theme(strip.text.y = element_text(angle = 0),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.spacing = unit(0, "lines")) +
   labs(fill="Phylum")

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
  mutate(section=unlist(section)) %>%
  filter(!is.na(count)) %>%
  filter(count > 0) %>%
  filter(section != "Ileum") %>%
  filter(type == "Positive") %>%
  filter(quality == 5) %>%
  mutate(genome=factor(genome,levels=genome_tree$tip.label)) %>% 
  ggplot(aes(x=count, y=microsample, fill=genus, group=genus)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    labs(x = "Relative abundance", y="Microsamples") +
    facet_nested(cryosection  ~ .,  scales="free_y") + #facet per day and treatment
 guides(fill = guide_legend()) +
    theme(strip.text.y = element_text(angle = 0),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.spacing = unit(0, "lines"),
          legend.position="bottom") +
   labs(fill="Genus")

4.1.2 Positive samples, coverage-unfiltered

#Get phylum colors from the EHI standard
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    select(phylum, colors) %>% 
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()
Rows: 202 Columns: 2
── Column specification ──────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genome_counts %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
  mutate(section=unlist(section)) %>%
  filter(!is.na(count)) %>%
  filter(count > 0) %>%
  filter(section != "Ileum") %>%
  filter(type == "Positive") %>%
  filter(quality == 5) %>%
  ggplot(., aes(x=count,y=microsample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    labs(x = "Relative abundance", y="Microsamples") +
    facet_nested(cryosection ~ .,  scales="free_y") + #facet per day and treatment
 guides(fill = guide_legend(ncol = 1)) +
    theme(strip.text.y = element_text(angle = 0),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.spacing = unit(0, "lines")) +
   labs(fill="Phylum")

4.1.3 Control samples, coverage-unfiltered

#Get phylum colors from the EHI standard
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    #filter(phylum %in% c("p__Actinomycetota","p__Bacillota","p__Bacillota_A","p__Pseudomonadota","p__Verrucomicrobiota")) %>%
    select(phylum, colors) %>% 
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()
Rows: 202 Columns: 2
── Column specification ──────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genome_counts %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  filter(is.na(Xcoord)) %>%
  filter(type %in% c("NegativeMembrane","NegativeCollection","NegativeReaction")) %>%
  ggplot(., aes(x=count,y=microsample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    labs(x = "Relative abundance", y="Membrane controls") +
    facet_nested(cryosection + type ~ .,  scales="free_y") + #facet per day and treatment
 guides(fill = guide_legend(ncol = 1)) +
    theme(strip.text.y = element_text(angle = 0),
          axis.text.y = element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.spacing = unit(0, "lines")) +
   labs(fill="Phylum")

vertical_tree <- force.ultrametric(genome_tree,method="extend") %>%
        ggtree(., size = 0.3) + geom_tiplab()
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
Also defined by 'tidytree'
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)
Rows: 202 Columns: 2
── Column specification ──────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
  select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()
Rows: 202 Columns: 2
── Column specification ──────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): phylum, colors

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vertical_tree <- gheatmap(vertical_tree, phylum_colors, offset=-0.3, width=0.1, colnames=FALSE) +
    scale_fill_manual(values=colors_alphabetic) +
    new_scale_fill()
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
sample_selection <- sample_metadata %>%
      filter(!is.na(Xcoord)) %>%
      left_join(quality, by=join_by(microsample==microsample)) %>%
      filter(quality>=5) %>% select(microsample) %>% pull()

genome_counts_selected <- genome_counts_filt %>%
          select(all_of(c("genome",sample_selection))) %>% column_to_rownames(var="genome") %>% tss()

vertical_tree <- gheatmap(vertical_tree, genome_counts_selected, offset=-0.2, width=0.5, colnames=FALSE, colnames_angle=90, font.size=3, colnames_position="top", colnames_offset_y = 15) +
    vexpand(.08) +
    coord_cartesian(clip = "off") +
    scale_fill_gradient(low = "#f4f4f4", high = "#315b7d", na.value="#f4f4f4") +
    new_scale_fill()
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
vertical_tree

4.2 Genus overview

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "microsample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(microsample == microsample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(quality, by = join_by(microsample == microsample)) %>% #append sample metadata
  filter(quality>=5) %>%
  group_by(microsample,cryosection,phylum,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__") %>%
  mutate(genus= sub("^g__", "", genus))

genus_summary_sort <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean)

genus_summary_sort %>%
    tt()
tinytable_2i3m0d273zib6fky5p26
genus mean sd
Blautia 1.415165e-01 0.1073365208
Eisenbergiella 1.211367e-01 0.0072496627
Mediterraneibacter 9.882063e-02 0.0080567835
Choladocola 5.382332e-02 0.0181704527
Streptococcus 5.364124e-02 0.0446141635
Agathobaculum 4.394104e-02 0.0120987075
Caccovicinus 4.336548e-02 0.0151276583
Pelethomonas 3.697454e-02 0.0344834189
Lactobacillus 3.319425e-02 0.0257383125
Fimenecus 3.243428e-02 0.0302682673
Gallimonas 3.044755e-02 0.0285881065
Acutalibacter 2.897240e-02 0.0035123478
UBA1417 2.524574e-02 0.0083481457
Negativibacillus 2.239375e-02 0.0076181853
Lawsonibacter 2.185606e-02 0.0060775371
Anaerobutyricum 1.960099e-02 0.0021010163
Enterocloster 1.480431e-02 0.0074888423
Dysosmobacter 1.304275e-02 0.0093588049
Merdivicinus 1.203935e-02 0.0071049659
Lachnoclostridium_A 1.185888e-02 0.0031752503
Intestinimonas 1.159075e-02 0.0071633881
Rubneribacter 9.925375e-03 0.0013520870
Flavonifractor 9.403895e-03 0.0016224845
Clostridium_Q 8.511605e-03 0.0091097365
Thomasclavelia 8.455363e-03 0.0067802297
Anaeromassilibacillus 8.097524e-03 0.0078246041
Escherichia 7.594034e-03 0.0033677408
Copromonas 7.283530e-03 0.0025945191
Faecousia 7.123719e-03 0.0068608648
Ornithomonoglobus 6.336604e-03 0.0057056932
UMGS1370 5.551334e-03 0.0041535617
Anaerotignum 5.047360e-03 0.0045683428
Enterenecus 3.936354e-03 0.0141899497
Merdisoma 3.916381e-03 0.0036395670
HGM12545 3.633129e-03 0.0034388628
Borkfalkia 3.495036e-03 0.0017718409
Pullichristensenella 3.469869e-03 0.0032979124
Scatosoma 3.325432e-03 0.0031174044
Scatomorpha 3.161439e-03 0.0029680201
Sellimonas 3.093446e-03 0.0029526525
Klebsiella 2.695176e-03 0.0030438039
Lachnoclostridium_B 2.584244e-03 0.0024238099
Faecivivens 2.294980e-03 0.0022184880
Akkermansia 1.712760e-03 0.0016207176
Anaerostipes 1.348611e-03 0.0015525474
Anaerotruncus 1.222109e-03 0.0013483618
Evtepia 1.044212e-03 0.0011793775
Ruthenibacterium 9.754233e-04 0.0011289357
Gallacutalibacter 9.580371e-04 0.0012464595
Roslinia 8.184423e-04 0.0010738386
Scatavimonas 6.521948e-04 0.0008791553
HGM12998 2.844308e-04 0.0005424567
Merdimonas 2.816320e-04 0.0008337256
Massilimicrobiota 2.765415e-04 0.0005917204
Romboutsia 2.042220e-04 0.0004908791
UBA4716 1.315085e-04 0.0003607827
Clostridium_AQ 1.240170e-04 0.0003620509
Harrysmithimonas 9.739388e-05 0.0005322090
Gordonibacter 6.437947e-05 0.0002756017
Acetatifactor 0.000000e+00 0.0000000000
Alangreenwoodia 0.000000e+00 0.0000000000
Alistipes 0.000000e+00 0.0000000000
An181 0.000000e+00 0.0000000000
Angelakisella 0.000000e+00 0.0000000000
Avimicrobium 0.000000e+00 0.0000000000
Avoscillospira 0.000000e+00 0.0000000000
Bifidobacterium 0.000000e+00 0.0000000000
Blautia_A 0.000000e+00 0.0000000000
Butyricicoccus 0.000000e+00 0.0000000000
CAG-245 0.000000e+00 0.0000000000
CAG-269 0.000000e+00 0.0000000000
CAG-273 0.000000e+00 0.0000000000
CAG-302 0.000000e+00 0.0000000000
CAG-313 0.000000e+00 0.0000000000
CAJFPI01 0.000000e+00 0.0000000000
CAJFUH01 0.000000e+00 0.0000000000
Caccenecus 0.000000e+00 0.0000000000
Caccomorpha 0.000000e+00 0.0000000000
Caccousia 0.000000e+00 0.0000000000
Catenibacillus 0.000000e+00 0.0000000000
Coprocola 0.000000e+00 0.0000000000
Coproplasma 0.000000e+00 0.0000000000
Egerieicola 0.000000e+00 0.0000000000
Enterococcus 0.000000e+00 0.0000000000
Enterococcus_B 0.000000e+00 0.0000000000
Enterococcus_D 0.000000e+00 0.0000000000
Eubacterium_R 0.000000e+00 0.0000000000
Faecalibacterium 0.000000e+00 0.0000000000
Faeciplasma 0.000000e+00 0.0000000000
Fimicola 0.000000e+00 0.0000000000
Fimimorpha 0.000000e+00 0.0000000000
Fimivicinus 0.000000e+00 0.0000000000
Fournierella 0.000000e+00 0.0000000000
Gallispira 0.000000e+00 0.0000000000
Galloscillospira_A 0.000000e+00 0.0000000000
Gemmiger 0.000000e+00 0.0000000000
Heritagella 0.000000e+00 0.0000000000
Heteroclostridium 0.000000e+00 0.0000000000
Holdemania 0.000000e+00 0.0000000000
Hungatella_B 0.000000e+00 0.0000000000
JAETTH01 0.000000e+00 0.0000000000
Ligilactobacillus 0.000000e+00 0.0000000000
Limosilactobacillus 0.000000e+00 0.0000000000
Merdibacter 0.000000e+00 0.0000000000
Metalachnospira 0.000000e+00 0.0000000000
Neoanaerotignum_A 0.000000e+00 0.0000000000
Onthovicinus 0.000000e+00 0.0000000000
Ornithomonoglobus_A 0.000000e+00 0.0000000000
Paenibacillus_A 0.000000e+00 0.0000000000
Pararuminococcus 0.000000e+00 0.0000000000
Proteus 0.000000e+00 0.0000000000
Pseudobutyricicoccus 0.000000e+00 0.0000000000
Pullilachnospira 0.000000e+00 0.0000000000
RUG591 0.000000e+00 0.0000000000
RUG626 0.000000e+00 0.0000000000
Ruminococcus_G 0.000000e+00 0.0000000000
Salmonella 0.000000e+00 0.0000000000
Sarcina 0.000000e+00 0.0000000000
Scybalenecus 0.000000e+00 0.0000000000
Spyradocola 0.000000e+00 0.0000000000
Timburyella 0.000000e+00 0.0000000000
Tyzzerella 0.000000e+00 0.0000000000
UMGS775 0.000000e+00 0.0000000000
UMGS856 0.000000e+00 0.0000000000
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

genus_summary %>%
    mutate(genus=factor(genus,levels=rev(genus_summary_sort %>% pull(genus)))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=colors_alphabetic) +
        geom_jitter(alpha=0.3) + 
        facet_grid(.~cryosection)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")